package analyzeExcel;

import java.nio.charset.Charset;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import cn.hutool.core.io.file.FileReader;
import cn.hutool.core.io.file.FileWriter;
import cn.hutool.http.HttpUtil;


/**
 * 说明：爬取网页 创建人：FH Q 3 1359 679 0 修改时间：2016年3月24日
 * 
 * @version
 */
public class GetWeb {

	 public static void main(String[] args) {
		 //getUlrs();
		//getContext();
		 clear();
		 //使用正则获取所有标题
//		 List<String> sections = ReUtil.findAll("<section class=\"ml_main\">(.*?)</section>", listContent, 1);
//		 for (String section : sections) {
//			 List<String> dls = ReUtil.findAll("<dl>(.*?)</dl>", section, 1);
//		     for(String dl : dls){
//		    	 System.out.println(dl);
//		     }
//		 }
	}
	public static void clear(){
		FileReader fileReader = new FileReader("F:/倾念其琛.txt");
		List<String> result = fileReader.readLines();
		FileWriter writer = new FileWriter("F:/xiaoshuo1.txt");
		result.stream().forEach(text -> {
			//text = text.replaceAll ("(.{30})", "$1\n");//加入：
			System.out.println(text.length());
			System.out.println(text);
			//writer.append(text.length() < 50 ? text.trim():text); 
		}); 
	}
	public static void getUlrs(){
		//请求列表页
		String listContent = HttpUtil.get("http://www.xiangcunxiaoshuo.la/html/419524/",Charset.forName("GBK"));
		Document doc = Jsoup.parse(listContent); 
		FileWriter writer = new FileWriter("F:/xiaoshuourl.txt");
		 
		Elements links = doc.select("dd > a[href]");
		for(Element link:links){ 
			String linkHref = link.attr("href");
			String linkText = link.text();
			System.out.println("开始请求"+linkText+":"+linkHref);
			writer.append(linkText+","+linkHref+"\n");
		}
	}
	public static void getContext(){
		FileReader fileReader = new FileReader("F:/xiaoshuourl.txt");
		List<String> result = fileReader.readLines();
		FileWriter writer = new FileWriter("F:/xiaoshuo.txt");
		result.stream().forEach(url -> {
			 String[] para = url.split(",");
			 String linkHref = para[1];
             String linkText = para[0];
			 String context = HttpUtil.get("http://www.xiangcunxiaoshuo.la"+linkHref,Charset.forName("GBK"));
			 Document cDoc = Jsoup.parse(context);
			 Elements texts = cDoc.select("div.yd_text2");
			 StringBuilder build = new StringBuilder();
			 build.append("\n"+linkText+"\n");
			 for(Element text:texts){
				 build.append(text.text().replace(" ",""));
			 }  
			 writer.append(build.toString());
			 System.out.println("写入完成-"+linkText);
			 
		}); 
	}
}